####################################
######## ISSP graphs ############
####################################

## Purpose: This code includes all the
## replication code for the supplementary
## ISSP analysis included in the SI,
## Section A.
## Data In:
## 1) GESIS ISSP datasets for 1987, 1992,
## 1999, 2009, and 2019. Follow the instructions
## in the README for downloaded these files and saving them
## in the appropriate place in the replication folder.
## 2) CEO pay estimates
## data_files/Share of Management and Professional Employment 1992_2019.xlsx
## Data Out:
## 1) figures/issp_sd_mean_party.pdf
## 2) figures/ave_occupation_pay_issp_alt.pdf
## 3) figures/four_measures_issp.pdf

#################################
### Libraries and Dependencies ##
#################################

library(tidyverse) ## 2.0.0
library(haven) ## 2.5.4
library(labelled) ## 2.13.0
library(openxlsx) ## 4.2.6.1
library(VGAM) ## 1.1.11
library(diagis) ## 0.2.3
library(stargazer) ## 5.2.3

## set working directory to Dataverse
## folder 

## CEO pay estimates from
## Economic Policy Institute,
## discussed in SI, section A

pay_estimates <- read.xlsx("data_files/Share of Management and Professional Employment 1992_2019.xlsx",
                           sheet = 3)

## MLE estimator for pareto dist. 
## MLE estimator for pareto dist. 
pareto.MLE <- function(X){
  n <- length(X)
  m <- min(X)
  a <- n/sum(log(X)-log(m))
  return( c(m,a) ) 
}
options(scipen=999)


#########################
## ISSP Data #############
##########################

## function for cleaning NA values for pay values
## and changing top coded values to top code
na_function <- function(x){
  x <- as.numeric(x)
  x <- na_if(x, 999997)
  x <- na_if(x, 99999999997)
  x <- na_if(x, 999998)
  if(x == 999996 &
     !is.na(x)){
    x <- 1000000
  }
  x <- na_if(x, 99999999998)
  x <- na_if(x, 999999)
  x <- na_if(x, 99999999999)
  if(x == 99999999996 &
     !is.na(x)){
    x <- 1000000
  }
  x <- na_if(x, -99)
  x <- na_if(x, -98)
  x <- na_if(x, -97)
  x <- na_if(x, -9)
  x <- na_if(x, -8)
  return(x)
}

###########################
## 1) 1987 ################
###########################

issp_1987 <- read_dta("ISSP_data/issp_1987/ZA1680.dta")

## limit to US respondents
issp_1987 <- issp_1987 %>%
  filter(v3 == 4)

## restrict to relevant variables
issp_1987 <- issp_1987 %>%
  dplyr::select(v30, ## payexec
         v27, ## paydoc
         ## paysales clerk
         v31, ## payskill
         v35, ## payunskill
         v82, ## age
         v97, ## party ID
         v88, ## highest degree
         v87, ## years of edu
         v93, ## income
         v83, ## sex
         v107) ## weight



## add year
issp_1987$year <- 1987

colnames(issp_1987) <- c("payexec",
                         "paydoc",
                         "payskill",
                         "payunskill",
                         "age",
                         "party_id",
                         "degree",
                         "years_edu",
                         "income",
                         "sex",
                         "weight",
                         "year")
issp_1987$paysales <- NA

## cleaning variables
## top coded value for all pay variables: 1000000
issp_1987$payexec <- unlist(lapply(issp_1987$payexec, function(x){
  na_function(x)
}))
issp_1987$paydoc <- unlist(lapply(issp_1987$paydoc, function(x){
  na_function(x)
}))
issp_1987$payskill <- unlist(lapply(issp_1987$payskill, function(x){
  na_function(x)
}))
issp_1987$payunskill <- unlist(lapply(issp_1987$payunskill, function(x){
  na_function(x)
}))

## Removing NAs from other variables
## cleaning alues
issp_1987$age <- as.numeric(na_if(issp_1987$age, 99))
issp_1987$party_id <- na_if(issp_1987$party_id, 98)
issp_1987$party_id <- na_if(issp_1987$party_id, 99)
issp_1987$party_id <- dplyr::recode(issp_1987$party_id,
                                    `1` = "Democrat",
                                    `2` = "Democrat",
                                    `3` = "Indep/Other",
                                    `4` = "Indep/Other",
                                    `5` = "Indep/Other",
                                    `6` = "Republican",
                                    `7` = "Republican",
                                    `95` = "Indep/Other")
issp_1987$degree <- dplyr::recode(issp_1987$degree,
                                  `1` = "HS or Less",
                                  `2` = "HS or Less",
                                  `3` = "HS or Less",
                                  `4` = "Some College",
                                  `5` = "Bachelor",
                                  `6` = "Graduate")

## Collapsing values of education variable
issp_1987$degree <- ifelse(issp_1987$degree == "HS or Less" &
                             (issp_1987$years_edu > 12 |
                                issp_1987$years_edu == 95 |
                                issp_1987$years_edu == 96),
                           "Some College", issp_1987$degree)
issp_1987$income <- dplyr::recode(issp_1987$income,
                                  `1` = "Less than 1000",
                                  `2` = "1000 - 2999",
                                  `3` = "3000 - 3999",
                                  `4` = "4000 - 4999",
                                  `5` = "5000 - 5999",
                                  `6` = "6000 - 6999",
                                  `7` = "7000 - 7999",
                                  `8` = "8000 - 9999",
                                  `9` = "10000 - 12499",
                                  `10` = "12500 - 14999",
                                  `11` = "15000 - 17499",
                                  `12` = "17500 - 19999",
                                  `13` = "20000 - 22499",
                                  `14` = "22500 - 24999",
                                  `15` = "25000 - 29999",
                                  `16` = "30000 - 34999",
                                  `17` = "35000 - 39999",
                                  `18` = "40000 - 49999",
                                  `19` = "50000 - 59999",
                                  `20` = "60000 and more")
issp_1987$sex <- as.character(haven::as_factor(issp_1987$sex))
issp_1987$years_edu <- as.numeric(issp_1987$years_edu)
issp_1987$weight <- as.numeric(issp_1987$weight)

## remove missing data, 1564 -> 1220
issp_1987 <- issp_1987[complete.cases(issp_1987[, c("payexec",
                                                    "paydoc",
                                                    "payskill",
                                                    "payunskill",
                                                    "age",
                                                    "party_id",
                                                    "degree",
                                                    "years_edu",
                                                    "income",
                                                    "sex",
                                                    "weight",
                                                    "year")]), ]

###########################
## 2) 1992 ################
###########################

issp_1992 <- read_dta("ISSP_data/issp_1992/ZA2310.dta")

## restrict to USA
issp_1992 <- issp_1992 %>%
  filter(v3 == 5)

issp_1992 <- issp_1992 %>%
  dplyr::select(v29, ## payexec
         v27, ## paydoc
         v28, ## paysales
         v35, ## payunskill -  updated
         v26, ## payskill- updated
         v100, ## age
         usa122, ## party ID
         usa103, ## highest degree
         v102, ## years of edu
         usa116, ## income
         v99, ## sex
         v176) ## weight

issp_1992$year <- 1992
colnames(issp_1992) <- c("payexec",
                         "paydoc",
                         "paysales",
                         "payunskill",
                         "payskill",
                         "age",
                         "party_id",
                         "degree",
                         "years_edu",
                         "income",
                         "sex",
                         "weight",
                         "year")


## cleaning variables
## top coded value for all pay variables: 1000000
issp_1992$payexec <- unlist(lapply(issp_1992$payexec, function(x){
  na_function(x)
}))
issp_1992$paydoc <- unlist(lapply(issp_1992$paydoc, function(x){
  na_function(x)
}))
issp_1992$payskill <- unlist(lapply(issp_1992$payskill, function(x){
  na_function(x)
}))
issp_1992$payunskill <- unlist(lapply(issp_1992$payunskill, function(x){
  na_function(x)
}))
issp_1992$paysales <- unlist(lapply(issp_1992$paysales, function(x){
  na_function(x)
}))

## removing NA values
## cleaning values
issp_1992$age <- as.numeric(na_if(issp_1992$age, 0))
issp_1992$age <- na_if(issp_1992$age, 99)
issp_1992$party_id <- as.character(haven::as_factor(issp_1992$party_id))
issp_1992$degree <- as.character(haven::as_factor(issp_1992$degree))
issp_1992$degree <- ifelse(issp_1992$degree == "High School" &
                             (issp_1992$years_edu > 12 |
                                issp_1992$years_edu == 95 |
                                issp_1992$years_edu == 96),
                           "Some College", issp_1992$degree)
issp_1992$income <- as.character(haven::as_factor(issp_1992$income))
issp_1992$sex <- as.character(haven::as_factor(issp_1992$sex))
issp_1992$years_edu <- as.numeric(issp_1992$years_edu)
issp_1992$weight <- as.numeric(issp_1992$weight)

## remove missing data, 1273 -> 1065
issp_1992 <- issp_1992[complete.cases(issp_1992[, c("payexec",
                                                    "paydoc",
                                                    "paysales",
                                                    "payskill",
                                                    "payunskill",
                                                    "age",
                                                    "party_id",
                                                    "degree",
                                                    "years_edu",
                                                    "income",
                                                    "sex",
                                                    "weight",
                                                    "year")]), ]

###########################
## 1999 ###################
###########################

issp_1999 <- read_sav("ISSP_data/issp_1999/ZA3430_2006-09-14.sav")

## US respondents
issp_1999 <- issp_1999 %>%
  filter(V3 == 6)

issp_1999 <- issp_1999 %>%
  dplyr::select(V16, ## payexec
         V15, ## paydoc
         V18, ## paysales
         V14, ## payskill
         V21, ## payunskill
         AGE, ## age
         X_PRTY, ## party ID
         DEGREE, ## highest degree
         EDUCYRS, ## years of edu
         INCOME, ## income
         SEX, ## sex
         WEIGHT) ## weight

issp_1999$year <- 1999
colnames(issp_1999) <- c("payexec",
                         "paydoc",
                         "paysales",
                         "payskill",
                         "payunskill",
                         "age",
                         "party_id",
                         "degree",
                         "years_edu",
                         "income",
                         "sex",
                         "weight",
                         "year")

## cleaning top coded values
issp_1999$payexec <- unlist(lapply(issp_1999$payexec, function(x){
  na_function(x)
}))
issp_1999$paydoc <- unlist(lapply(issp_1999$paydoc, function(x){
  na_function(x)
}))
issp_1999$paysales <- unlist(lapply(issp_1999$paysales, function(x){
  na_function(x)
}))
issp_1999$payskill <- unlist(lapply(issp_1999$payskill, function(x){
  na_function(x)
}))
issp_1999$payunskill <- unlist(lapply(issp_1999$payunskill, function(x){
  na_function(x)
}))
## top code value is 1,000,000

## removing NA values
issp_1999$age <- as.numeric(na_if(issp_1999$age, 98))
issp_1999$age <- na_if(issp_1999$age, 99)
issp_1999$party_id <- as.character(haven::as_factor(issp_1999$party_id))
issp_1999$degree <- as.character(haven::as_factor(issp_1999$degree)) ## already has some college category

issp_1999$income <- as.character(haven::as_factor(issp_1999$income))
issp_1999$income <- na_if(issp_1999$income, "Don't know")
issp_1999$income <- na_if(issp_1999$income, "Refused")
issp_1999$income <- na_if(issp_1999$income, "No answer")
issp_1999$income <- dplyr::recode(issp_1999$income,
                                  `Lowest decile` = "1",
                                  `Highest decile` = "10")
issp_1999$sex <- as.character(haven::as_factor(issp_1999$sex))
issp_1999$years_edu <- as.numeric(issp_1999$years_edu)
issp_1999$weight <- as.numeric(issp_1999$weight)

## remove missing cases 1272 -> 945
issp_1999 <- issp_1999[complete.cases(issp_1999[, c("payexec",
                                                    "paydoc",
                                                    "paysales",
                                                    "payskill",
                                                    "payunskill",
                                                    "age",
                                                    "party_id",
                                                    "degree",
                                                    "years_edu",
                                                    "income",
                                                    "sex",
                                                    "weight",
                                                    "year")]), ]


################################
## 2009 ########################
################################

issp_2009 <- read_dta("ISSP_data/issp_2009/ZA5400_v4-0-0.dta/ZA5400_v4-0-0.dta")

issp_2009 <- issp_2009 %>%
  filter(V5 == 840) ## limit to US


issp_2009 <- issp_2009 %>%
  dplyr::select(V23, ## payexec
         V22, ## paydoc
         V24, ## paysales
         ## payskill
         V25, ## payunskill
         AGE, ## age
         US_PRTY, ## party ID
         US_DEGR, ## highest degree
         EDUCYRS, ## years of edu
         US_INC, ## income
         SEX, ## sex
        WEIGHT) ## weight

issp_2009$year <- 2009
colnames(issp_2009) <- c("payexec",
                         "paydoc",
                         "paysales",
                         "payunskill",
                         "age",
                         "party_id",
                         "degree",
                         "years_edu",
                         "income",
                         "sex",
                         "weight",
                         "year")
issp_2009$payskill <- NA

## top coded value is much higher 99 million or more (previously 1 million or 
## more), no values have that
issp_2009$payexec <- unlist(lapply(issp_2009$payexec, function(x){
  na_function(x)
}))
issp_2009$paydoc <- unlist(lapply(issp_2009$paydoc, function(x){
  na_function(x)
}))
issp_2009$paysales <- unlist(lapply(issp_2009$paysales, function(x){
  na_function(x)
}))
issp_2009$payunskill <- unlist(lapply(issp_2009$payunskill, function(x){
  na_function(x)
}))
issp_2009$age <- as.numeric(na_if(issp_2009$age, 99))
issp_2009$party_id <- as.character(haven::as_factor(issp_2009$party_id))
issp_2009$degree <- as.character(haven::as_factor(issp_2009$degree))
issp_2009$degree <- ifelse(issp_2009$degree == "High school" &
                             (issp_2009$years_edu > 12 |
                                issp_2009$years_edu == 95 |
                                issp_2009$years_edu == 96),
                           "Some College", issp_2009$degree)
issp_2009$income <- as.character(haven::as_factor(issp_2009$income))
issp_2009$income <- na_if(issp_2009$income, "DK")
issp_2009$income <- na_if(issp_2009$income, "Refused")
issp_2009$sex <- as.character(haven::as_factor(issp_2009$sex))

issp_2009$years_edu <- as.numeric(issp_2009$years_edu)
issp_2009$weight <- as.numeric(issp_2009$weight)

## remove missing data 1581 -> 1306
issp_2009 <- issp_2009[complete.cases(issp_2009[, c("payexec",
                                                    "paydoc",
                                                    "paysales",
                                                    "payunskill",
                                                    "age",
                                                    "party_id",
                                                    "degree",
                                                    "years_edu",
                                                    "income",
                                                    "sex",
                                                    "weight",
                                                    "year")]), ]



#########################
## 2019
issp_2019 <- read_dta("ISSP_data/ISSP_2019/ZA7600_v3-0-0.dta/ZA7600_v3-0-0.dta")
issp_2019 <- issp_2019 %>%
  filter(country == 840)

issp_2019 <- issp_2019 %>%
  dplyr::select(v12, ## payexec
         v11, ## paydoc
         v13, ## paysales
         ## payskill
         v14, ## payunskill
         AGE, ## age
         US_PRTY, ## party ID
         US_DEGR, ## highest degree
         EDUCYRS, ## years of edu
         US_INC, ## income
         SEX, ## sex
         WEIGHT) ## weight 

issp_2019$year <- 2019

colnames(issp_2019) <- c("payexec",
                         "paydoc",
                         "paysales",
                         "payunskill",
                         "age",
                         "party_id",
                         "degree",
                         "years_edu",
                         "income",
                         "sex",
                         "weight",
                         "year")
issp_2019$payskill <- NA

## no top coded values for 2019 data 
issp_2019$payexec <- unlist(lapply(issp_2019$payexec, function(x){
  na_function(x)
}))
issp_2019$paydoc <- unlist(lapply(issp_2019$paydoc, function(x){
  na_function(x)
}))
issp_2019$paysales <- unlist(lapply(issp_2019$paysales, function(x){
  na_function(x)
}))
issp_2019$payunskill <- unlist(lapply(issp_2019$payunskill, function(x){
  na_function(x)
}))
issp_2019$age <- as.numeric(na_if(issp_2019$age, -9))
issp_2019$party_id <- as.character(haven::as_factor(issp_2019$party_id))
issp_2019$party_id <- na_if(issp_2019$party_id, "-9. No answer")
issp_2019$party_id <- na_if(issp_2019$party_id, "-4. NAP (Code 2, -4, -7 in VOTE_LE)")

## NOTE: 2019 is the only survey that did not ask about party affiliation
## the variable we label as party_id
## is a question about party R voted for in last election
## we include for lack of other variables but exclude in sensitivity checks

issp_2019$degree <- as.character(haven::as_factor(issp_2019$degree))
issp_2019$degree <- na_if(issp_2019$degree, "-9. No answer")
issp_2019$degree <- ifelse(issp_2019$degree == "2. High school" &
                             issp_2019$years_edu > 12,
                           "Some College", issp_2019$degree)
issp_2019$income <- as.character(haven::as_factor(issp_2019$income))
issp_2019$sex <- as.character(haven::as_factor(issp_2019$sex))
issp_2019$years_edu <- as.numeric(issp_2019$years_edu)
issp_2019$weight <- as.numeric(issp_2019$weight)

## remove missing data 1852 -> 1163
issp_2019 <- issp_2019[complete.cases(issp_2019[, c("payexec",
                                                    "paydoc",
                                                    "paysales",
                                                    "payunskill",
                                                    "age",
                                                    "party_id",
                                                    "degree",
                                                    "years_edu",
                                                    "income",
                                                    "sex",
                                                    "weight",
                                                    "year")]), ]

##########################################################
### Combining Files and Harmonizing Values ###############
##########################################################

## 5699 observations
issp <- bind_rows(issp_1987,
                  issp_1992,
                  issp_1999,
                  issp_2009,
                  issp_2019)

issp <- issp %>%
  dplyr::select(-years_edu)


## Harmonizing Pay Variables

## for years 87, 92, 99 the pay variables
## were top coded
## we adjust for the top coding with draws from a pareto distribution
## for all three years top coded value is 1,000,000
## versus for 2009 the "top code" was 99,999,999 (no values had that,
## highest value was 90,000,000)
## and for 2019 there was no top code, values extended to
## 10,000,000 (clumping around that value,
## but given that there is no documentation of 
## top code likely due to people picking that as a 
## common value)
## 1987 is missing the paysales variable 
## given these high values in later year, 
## we take draws from a pareto distribution
## estimated based on empirical distribution by year
## between top code value and 10x top code value 

issp <- split(issp, issp$year)

set.seed(97405)

for(i in 1:length(issp)){
  if(unique(issp[[i]]$year) %in% c(1987, 1992, 1999)){
    
    ## calculate maximum likelihood estimates for pareto dist.
    ## parameters for each pay variable
    pay_exec <- pareto.MLE(issp[[i]]$payexec)
    pay_doc <- pareto.MLE(issp[[i]]$paydoc)
    pay_skill <- pareto.MLE(issp[[i]]$payskill)
    pay_unskill <- pareto.MLE(issp[[i]]$payunskill)
    
    ## for each estimated distribution, get quantiles of distribution
    ## between top code and ten times top code
    exec_quantile <- VGAM::ppareto(c(1000000, 10000000),
                                   scale = pay_exec[1],
                                   shape = pay_exec[2])
    doc_quantile <- VGAM::ppareto(c(1000000, 10000000),
                                  scale = pay_doc[1],
                                  shape = pay_doc[2])
    skill_quantile <- VGAM::ppareto(c(1000000, 10000000),
                                    scale = pay_skill[1],
                                    shape = pay_skill[2])
    unskill_quantile <- VGAM::ppareto(c(1000000, 10000000),
                                      scale = pay_unskill[1],
                                      shape = pay_unskill[2])
    
    ## determine which rows equal top code
    exec_index <- which(issp[[i]]$payexec == 1000000)
    doc_index <- which(issp[[i]]$paydoc == 1000000)
    skill_index <- which(issp[[i]]$payskill == 1000000)
    unskill_index <- which(issp[[i]]$payunskill == 1000000)
    
    ## for each index, take draws from uniform distirbution
    ## between quantiles 
    exec_draws <- runif(length(exec_index), exec_quantile[1], exec_quantile[2])
    doc_draws <- runif(length(doc_index), doc_quantile[1], doc_quantile[2])
    skill_draws <- runif(length(skill_index), skill_quantile[1], skill_quantile[2])
    unskill_draws <- runif(length(unskill_index), unskill_quantile[1], unskill_quantile[2])
    
    ## transform draws into values using inverse cumulative distribution funct
    exec_values <- qpareto(exec_draws, pay_exec[1],
                           pay_exec[2])
    doc_values <- qpareto(doc_draws, pay_doc[1], pay_doc[2])
    skill_values <- qpareto(skill_draws, pay_skill[1], pay_skill[2])
    unskill_values <- qpareto(unskill_draws, pay_unskill[1], pay_unskill[2])
    
    ## add back to dataframe
    issp[[i]]$payexec_update <- issp[[i]]$payexec
    issp[[i]]$payexec_update[exec_index] <- exec_values
    
    issp[[i]]$paydoc_update <- issp[[i]]$paydoc
    issp[[i]]$paydoc_update[doc_index] <- doc_values
    
    issp[[i]]$payskill_update <- issp[[i]]$payskill
    issp[[i]]$payskill_update[skill_index] <- skill_values
    
    issp[[i]]$payunskll_update <- issp[[i]]$payunskill
    issp[[i]]$payunskll_update[unskill_index] <- unskill_values
    
    ## repeat steps for sales person, but excluding 1987
    ## because it didn't have that variable
    if(unique(issp[[i]]$year) != 1987){
      pay_sales <- pareto.MLE(issp[[i]]$paysales)
      sales_quantile <- VGAM::ppareto(c(1000000, 10000000),
                                      scale = pay_sales[1],
                                      shape = pay_sales[2])
      sales_index <- which(issp[[i]]$paysales == 1000000)
      sales_draws <- runif(length(sales_index), sales_quantile[1], sales_quantile[2])
      sales_values <- qpareto(sales_draws, pay_sales[1], pay_sales[2])
      
      issp[[i]]$paysales_update <- issp[[i]]$paysales
      issp[[i]]$paysales_update[sales_index] <- sales_values
    }
  } else {
    issp[[i]]$payexec_update <- issp[[i]]$payexec
    issp[[i]]$paydoc_update <- issp[[i]]$paydoc
    issp[[i]]$paysales_update <- issp[[i]]$paysales
    issp[[i]]$payskill_update <- issp[[i]]$payskill
    issp[[i]]$payunskll_update <- issp[[i]]$payunskill
  }
  
}

issp <- bind_rows(issp)


## Harmonizing values of education
## across waves
issp$degree <- dplyr::recode(issp$degree,
                             `1. Less than high school` = "HS or Less",
                             `2. High school` = "HS or Less",
                             `3. Junior college` = "Some College",
                             `4. Bachelor degree` = "Bachelor or More",
                             `High School` = "HS or Less",
                             `Incpl primary` = "HS or Less",
                             `Incpl secondary` = "HS or Less",
                             `Incpl university` = "Some College",
                             `Junior college` = "Some College",
                             `Less th High school` = "HS or Less",
                             `Less than High School` = "HS or Less",
                             `Primary compl` = "HS or Less",
                             `Secondary compl` = "HS or Less",
                             `University compl` = "Bachelor or More",
                             `5. Graduate school` = "Bachelor or More",
                             `Bachelor` = "Bachelor or More",
                             `Graduate` = "Bachelor or More",
                             `High school` = "HS or Less")
issp$degree <- factor(issp$degree,
                      levels = c("HS or Less",
                                 "Some College",
                                 "Bachelor or More"))

## harmonize age across survey waves
## we create birth year and cohort variables
issp$birth <- issp$year - issp$age
issp <- issp %>% mutate(cohort = case_when(birth < 1915 ~ "Before Greatest",
                                                      birth >=1915 & birth <1926 ~ "Greatest",
                                                      birth >= 1926 & birth <1945 ~ "Silent",
                                                      birth >= 1945 & birth <1965 ~ "Boomers",
                                                      birth >= 1965 & birth <1981 ~ "Gen-X",
                                                      birth >= 1981 ~ "Millenials"))
issp$cohort <- factor(issp$cohort,
                            levels = c("Boomers", "Before Greatest",
                                       "Greatest",
                                       "Silent", "Gen-X", "Millenials"),
                            labels = c("Boomers", "Before Greatest",
                                       "Greatest",
                                       "Silent", "Gen-X", "Millenials"))

## harmonize income 
## across survey waves
issp$income_unharmonized <- issp$income
issp$income <- na_if(issp$income, "Dont Know")
issp$income <- na_if(issp$income, "NA")
issp$income <- na_if(issp$income, "-9. No answer")
issp$income <- gsub("^\\$ ", "", issp$income)

issp$income <- dplyr::recode(issp$income,
                             `180000. 170.000 USD or more per year, before taxes` = 
                               "170.000 USD or more per year, before taxes",
                             `67500. 60.000-74.999 USD` = 
                               "60.000-74.999 USD",
                             `9000. 8.000-9.999 USD` = "8.000-9.999 USD",
                             `21750. 20.000-22.499 USD` = "20.000-22.499 USD",
                             `82500. 75.000-89.999 USD` = "75.000-89.999 USD",
                             `55000. 50.000-59.999 USD` = "50.000-59.999 USD",
                             `160000. 150.000-169.999 USD` = "150.000-169.999 USD",
                             `100000. 90.000-109.999 USD` = "90.000-109.999 USD",
                             `11250. 10.000-12.499 USD` = "10.000-12.499 USD",
                             `45000. 40.000-49.999 USD` = "40.000-49.999 USD",
                             `23750. 22.500-24.999 USD` = "22.500-24.999 USD",
                             `32500. 30.000-34.999 USD` = "30.000-34.999 USD",
                             `140000. 130.000-149.999 USD` = "130.000-149.999 USD",
                             `37500. 35.000-39.999 USD` = "35.000-39.999 USD",
                             `120000. 110.000-129.999 USD` = "110.000-129.999 USD",
                             `13750. 12.500-14.999 USD` = "12.500-14.999 USD",
                             `16750. 15.000-17.499 USD` = "15.000-17.499 USD",
                             `18750. 17.500-19.999 USD` = "17.500-19.999 USD",
                             `3500. 3.000-3.999 USD` = "3.000-3.999 USD",
                             `500. Under 1.000 USD per year, before taxes` = "Under 1.000 USD per year, before taxes",
                             `2000. 1.000-2.999 USD` = "1.000-2.999 USD",
                             `7500. 7.000-7.999 USD` = "7.000-7.999 USD",
                             `5500. 5.000-5.999 USD` = "5.000-5.999 USD",
                             `6500. 6.000-6.999 USD` = "6.000-6.999 USD",
                             `27500. 25.000-29.999 USD` = "25.000-29.999 USD")

issp$income <- gsub("to", "-", issp$income)
issp$income <- gsub("\\.|USD", "", issp$income)
issp$income <- gsub("and more|Less than|Under \\$|per year or over|Under |per year|or more per year, before taxes",
                    "", issp$income)
issp$income <- gsub(", before taxes", "", issp$income)
issp$income <- trimws(issp$income)
issp$income <- gsub("\\s", "", issp$income)


## We transform income into standardized
## income quintiles by transforming these bounded values
## (e.g. 10000 to 20000) into numeric values by taking
## draws between the bounded values for each observation.
## For top coded and bottom coded data we simply keep the 
## top coded and bottom coded values, as the variable we use
## in the analysis below is an income quintile variable. 

set.seed(97405)

issp$income <- unlist(lapply(issp$income, function(x){
  if(!is.na(x)){
    x <- unlist(str_split(x, "-"))
    if(length(x) > 1){
      x <- sample(x[1]:x[2],
                  1)
    }
  }
 return(x)
}))
issp$income <- as.numeric(issp$income)

## create income quintiles by year
issp <- split(issp, issp$year)
for(i in 1:length(issp)){
  quints <- quantile(issp[[i]]$income, seq(0,1,.2), na.rm = TRUE)
  issp[[i]]$income_quint <- ifelse(issp[[i]]$income < quints[2],
                                   "1st Quintile",
                                   ifelse(issp[[i]]$income < quints[3],
                                          "2nd Quintile",
                                          ifelse(issp[[i]]$income < quints[4],
                                                 "3rd Quintile",
                                                 ifelse(issp[[i]]$income < quints[5],
                                                        "4th Quintile",
                                                        "5th Quintile"))))
}
issp <- bind_rows(issp)
issp$income_quint <- factor(issp$income_quint,
                                    levels = c("1st Quintile",
                                               "2nd Quintile",
                                               "3rd Quintile",
                                               "4th Quintile",
                                               "5th Quintile"),
                                    ordered = TRUE)

## creating SES index
## to equal weight education and income quintile:
## (5/3 * education + income_quintile) / 2
## creates scale from 1 -5 - include as factor variable

issp$ses <- round((as.numeric(issp$degree) * 5/3 + 
                          as.numeric(issp$income_quint)) / 2)
issp$ses <- factor(issp$ses)

## party id
## Other party, refused na if
issp$party_id <- na_if(issp$party_id, "Other party, refused")
issp$party_id <- na_if(issp$party_id, "NAV")
issp$party_id <- na_if(issp$party_id, "NA")
issp$party_id <- dplyr::recode(issp$party_id,
                               `Not very strong democrat` = "Democrat",
                               `Independent, close to republican` = "Indep/Other",
                               `Independent` = "Indep/Other",
                               `Independent, close to democrat` = "Indep/Other",
                               `Not very strong republican` = "Republican",
                               `Strong democrat` = "Democrat",
                               `Strong republican` = "Republican",
                               `N very strg democrat` = "Democrat",
                               `INdep, close democr` = "Indep/Other",
                               `N very strg republ` = "Republican",
                               `Indep, close republ` = "Indep/Other",
                               `Independent, close Democrat` = "Indep/Other",
                               `Not very strong Democrat` = "Democrat",
                               `Strong Democrat` = "Democrat",
                               `Not very strong Republican` = "Republican",
                               `Other party` = "Indep/Other",
                               `Strong Republican` = "Republican",
                               `Independent, close Republican` = "Indep/Other",
                               `2. Democrat` = "Democrat",
                               `6. Republican` = "Republican",
                               `4. Other candidate` = "Indep/Other",
                               `Other Party` = "Indep/Other")

issp$sex <- na_if(issp$sex, "-9. No answer")
issp$sex <- tolower(issp$sex)
issp$sex <- gsub("[0-9]{1}\\.", "", issp$sex)
issp$sex <- trimws(issp$sex, "both")


##############################
#### Outliers ################
##############################

## Here we investigate outliers:

ggplot(issp,
       mapping = aes(x = paysales_update)) +
     # mapping = aes(x = payunskll_update)) +
      #mapping = aes(x = payskill_update)) +
       #mapping = aes(x = paydoc_update)) +
      # mapping = aes(x = payexec_update)) +
  geom_histogram() +
  scale_x_continuous(labels = scales::label_comma())+
  theme(axis.text = element_text(size = 5)) +
  facet_wrap(~ year,
             scales = "free")

## 2009 has huge outliers from payexec
## paydoc- almost a order
## of magnitude
## 2019 we observe the clumping around 10 million
## for payexec
## Almost every year-variable combination
## is a right tailed distribution with outliers
## As we discuss in the SI this is a common problem
## with these variables (e.g. saying sales clerk make
## 6 million). 

## We identify and remove these outliers
## for the analysis below. Our main conclusions
## are robust to including these outliers. If replicators
## wish to test this they can skip right to line 936
## where we log all variables. 
 
## Outliers: greater the Q3 + 1.5*IQR
## for each variable-year
outliers <- issp %>%
  dplyr::select(paydoc_update,
         paysales_update,
         payskill_update,
         payunskll_update,
         payexec_update,
         year) %>%
  gather(key = "measure",
         value = "dollar",
         -year) %>%
  group_by(measure, year) %>%
  summarize(iqr = IQR(dollar, na.rm = TRUE),
            quart= quantile(dollar,
                            na.rm =TRUE)[4],
            outlier = quart + 1.5*iqr) %>%
  dplyr::select(measure, year, outlier) %>%
  spread(key = "measure",
         value = "outlier")
colnames(outliers)[2:6] <- paste(colnames(outliers)[2:6],
                                 "outliers", sep = "_")

issp <- left_join(issp, outliers, by = "year")



## removing outliers
issp$paydoc_update <- ifelse(issp$paydoc_update > issp$paydoc_update_outliers,
                              NA,
                              issp$paydoc_update)
issp$payexec_update <- ifelse(issp$payexec_update > issp$payexec_update_outliers,
                             NA,
                             issp$payexec_update)
issp$paysales_update <- ifelse(issp$paysales_update > issp$paysales_update_outliers,
                              NA,
                              issp$paysales_update)
issp$payskill_update <- ifelse(issp$payskill_update > issp$payskill_update_outliers,
                               NA,
                               issp$payskill_update)
issp$payunskll_update <- ifelse(issp$payunskll_update > issp$payunskll_update_outliers,
                               NA,
                               issp$payunskll_update)


tapply(issp$payskill_update, issp$year, function(x){sum(is.na(x))})
tapply(issp$paysales_update, issp$year, function(x){sum(is.na(x))})
tapply(issp$paydoc_update, issp$year, function(x){sum(is.na(x))})
tapply(issp$payexec_update, issp$year, function(x){sum(is.na(x))})
tapply(issp$payunskll_update, issp$year, function(x){sum(is.na(x))})


## log all perception variables 
issp$payexec_update <- log(issp$payexec_update + 1)
issp$paydoc_update <- log(issp$paydoc_update + 1)
issp$paysales_update <- log(issp$paysales_update + 1)
issp$payskill_update <- log(issp$payskill_update + 1)
issp$payunskll_update <- log(issp$payunskll_update + 1)

############################################
#### Creating Perception Variables #########
##########################################

## Here we create all of our main perceptual outcome
## variables. We create different variations of each variable 
## depending on the years compared, as different variables
## are available in different years:

## 1987 - excludes sales
## 1992, 1999 - none missing 
## 2009, 2019 - excludes skilled worker

## 1) measure 1:
## standard deviation / mean

## 92, 99 only - all 5 occupations
issp$rowSD_92_99 <- apply(issp, 1, function(x){
  x <- x[13:17]
  x <- as.numeric(x)
  sd(x)
})
issp$rowMean_92_99 <- apply(issp, 1, function(x){
  x <- x[13:17]
  x <- as.numeric(x)
  mean(x)
})
issp$coefVar_92_99 <- issp$rowSD_92_99 / issp$rowMean_92_99

## 92 - 2019, all occupation except skilled factory worker
issp$rowSD_92_19 <- apply(issp, 1, function(x){
  x <- x[c(13,14,16,17)]
  x <- as.numeric(x)
  sd(x)
})
issp$rowMean_92_19 <- apply(issp, 1, function(x){
  x <- x[c(13,14,16,17)]
  x <- as.numeric(x)
  mean(x)
})
issp$coefVar_92_19 <- issp$rowSD_92_19 / issp$rowMean_92_19


## 3) 87 - 1999, all occupations except sales clerk
issp$rowSD_87_99 <- apply(issp, 1, function(x){
  x <- x[c(13:16)]
  x <- as.numeric(x)
  sd(x)
})
issp$rowMean_87_99 <- apply(issp, 1, function(x){
  x <- x[c(13:16)]
  x <- as.numeric(x)
  mean(x)
})

issp$coefVar_87_99 <- issp$rowSD_87_99 / issp$rowMean_87_99



## 2) Subjective Gini, 92-99 (all occupations)
## measure from Kuhn 2011 "In the Eye of the Beholder""
## We calculate our estimate population share of blue
## collar workers based on Bureau of Labor Statistics
## Data included in replication folder, 
## "data_files/Share of Management and Professional Employment 1992_2019"
## see first tab, column "Proportion Other"
## copied here:

blue_collar <- data.frame(year = c(1987, 1992, 1999, 2009, 2019),
                          share_blue_collar = c(0.704517965,
                            0.687897355,
                                    0.66015713,
                                    0.626683467,
                                    0.592366819))
issp <- left_join(issp,
                  blue_collar,
                  by = "year")

## average of blue collar and white collar occupations:
## 1992-1999 measure:
issp$sub_top_92_99 <- 1/2 * (issp$payexec_update+
                         issp$paydoc_update)
issp$sub_bottom_92_99 <- 1/3 * (issp$paysales_update + 
                            issp$payskill_update +
                            issp$payunskll_update)

## 1992-2019 measure
issp$sub_top_92_19 <- 1/2 * (issp$payexec_update+
                               issp$paydoc_update)
issp$sub_bottom_92_19 <- 1/2 * (issp$paysales_update + 
                                  issp$payunskll_update)

## 87-1999 measure
issp$sub_top_87_99 <- 1/2 * (issp$payexec_update+
                               issp$paydoc_update)
issp$sub_bottom_87_99 <- 1/2 * (issp$payskill_update + 
                                  issp$payunskll_update)

## estimate for total population wages
## weighted average of occupation means by population weights
issp$sub_total_92_99 <- (issp$share_blue_collar * issp$sub_bottom_92_99) + 
  ((1-issp$share_blue_collar) * issp$sub_top_92_99)
issp$sub_total_92_19 <- (issp$share_blue_collar * issp$sub_bottom_92_19) + 
  ((1-issp$share_blue_collar) * issp$sub_top_92_19)
issp$sub_total_87_99 <- (issp$share_blue_collar * issp$sub_bottom_87_99) + 
  ((1-issp$share_blue_collar) * issp$sub_top_87_99)


## estimated wage share of bottom group
issp$q_bottom_92_99 <- (issp$share_blue_collar * issp$sub_bottom_92_99) / 
  issp$sub_total_92_99
issp$q_bottom_92_19 <- (issp$share_blue_collar * issp$sub_bottom_92_19) / 
  issp$sub_total_92_19
issp$q_bottom_87_99 <- (issp$share_blue_collar * issp$sub_bottom_87_99) / 
  issp$sub_total_87_99

## subjective gini: difference between population
## share and wage share of bottom group
## smaller values - more proportional sharing of wage
## income (less inequality)
issp$sub_gini_92_99 <- issp$share_blue_collar - issp$q_bottom_92_99
issp$sub_gini_92_19 <- issp$share_blue_collar - issp$q_bottom_92_19
issp$sub_gini_87_99 <- issp$share_blue_collar - issp$q_bottom_87_99



## Measure 3: Ratio Measures

## 92-99
issp$worker_mean <- (issp$payskill_update +
                       issp$payunskll_update +
                       issp$paysales_update) / 3
issp$corporate_ratio <- issp$payexec_update / issp$worker_mean
issp$doctor_ratio <- issp$paydoc_update / issp$worker_mean

## 92-2019
issp$worker_mean <- (issp$payunskll_update +
                       issp$paysales_update) / 2
issp$corporate_ratio_92_19 <- issp$payexec_update / issp$worker_mean
issp$doctor_ratio_92_19 <- issp$paydoc_update / issp$worker_mean

## 87-99
issp$worker_mean <- (issp$payunskll_update +
                       issp$payskill_update) / 2
issp$corporate_ratio_87_99 <- issp$payexec_update / issp$worker_mean
issp$doctor_ratio_87_99 <- issp$paydoc_update / issp$worker_mean


## alternative ratio measure - available all years 
issp$corporate_ratio_all_years <- issp$payexec_update / issp$payunskll_update
issp$doctor_ratio_all_years <- issp$paydoc_update / issp$payunskll_update
issp$doctor_ratio_all_years[issp$payunskll_update == 0] <- NA

## skilled, sales - for comparison
issp$corporate_ratio_skilled <- issp$payexec_update / issp$payskill_update
issp$corporate_ratio_skilled[issp$payskill_update == 0] <- NA

issp$corporate_ratio_sales <- issp$payexec_update / issp$paysales_update
issp$corporate_ratio_sales[issp$paysales_update == 0] <- NA

####################################################
#### Visualizing Overtime Trend ####################
####################################################

## Figure A.1 in the SI:
## We display estimates for each variable over time 

overtime <- issp %>%
  dplyr::select(year, weight,
         coefVar_92_99,
         coefVar_92_19,
         coefVar_87_99,
         sub_gini_92_99,
         sub_gini_92_19,
         sub_gini_87_99,
         corporate_ratio,
         corporate_ratio_all_years,
         corporate_ratio_92_19,
         corporate_ratio_87_99,
         corporate_ratio_skilled,
         corporate_ratio_sales,
         doctor_ratio,
         doctor_ratio_92_19,
         doctor_ratio_87_99,
         doctor_ratio_all_years) %>%
  gather(key = "measure",
         value = "est",
         -year, -weight)



overall_mean <- overtime %>%
  group_by(year, measure) %>%
  summarise(mean = stats::weighted.mean(est, w = weight,
                                        na.rm = TRUE),
            se = weighted_se(est,
                             w = weight,
                             na.rm = TRUE),
            lower = mean + qnorm(.025)*se,
            upper = mean + qnorm(.975)*se)

overall_mean <- na.omit(overall_mean)


overall_mean$measure_label <- dplyr::recode(
  overall_mean$measure,
  `coefVar_92_99` = "Five Occupations",
  `coefVar_92_19` = "Four Occupations\n(Exclude Skilled)",
  `coefVar_87_99` = "Four Occupations\n(Exclude Sales Clerk)",
  `corporate_ratio` = "Five Occupations",
  `corporate_ratio_all_years` = "Three Occupations\n(Exclude Skilled, Sales)",
  `corporate_ratio_92_19` = "Four Occupations\n(Exclude Skilled)",
  `corporate_ratio_87_99` = "Four Occupations\n(Exclude Sales Clerk)",
  `doctor_ratio` = "Five Occupations",
  `doctor_ratio_all_years` = "Three Occupations\n(Exclude Skilled, Sales)",
  `doctor_ratio_92_19` = "Four Occupations\n(Exclude Skilled)",
  `doctor_ratio_87_99` = "Four Occupations\n(Exclude Sales Clerk)",
  `sub_gini_92_99` = "Five Occupations",
  `sub_gini_92_19` = "Four Occupations\n(Exclude Skilled)",
  `sub_gini_87_99` = "Four Occupations\n(Exclude Sales Clerk)",
  `corporate_ratio_sales` = "Ratio CEO:\nSales",
  `corporate_ratio_skilled` = "Ratio CEO:\nSkilled"
)

overall_mean$type <- gsub("_sales|_[0-9].*|_sales|_all_years",
                          "",
                          overall_mean$measure)

overall_mean$type <- dplyr::recode(overall_mean$type,
                                   `coefVar` = "Coefficient of Variation",
                                   `corporate_ratio` = "Ratio CEO Pay",
                                   `doctor_ratio` = "Ratio Doctor Pay",
                                   `corporate_ratio_skilled` = "Ratio CEO Pay",
                                   `sub_gini` = "Subjective Gini")


## Figure 1
## aggregate over time 

'%ni%' <- Negate("%in%")
ggplot(overall_mean[overall_mean$measure_label %ni%
                      c("Ratio CEO:\nSales",
                        "Ratio CEO:\nSkilled"), ],
       aes(x = factor(year),
           y = mean,
           color = measure_label)) +
  facet_wrap(~type,
             scales = "free_y",
             nrow = 3) +
  geom_point() +
  geom_errorbar(mapping = aes(ymin = lower,
                              ymax = upper),
                width = .2) +
  theme_bw() +
  labs(x = NULL,
       y = "Variable Scale",
       color = NULL) +
  theme(legend.position = "bottom") +
  scale_color_manual(values = c("#a6cee3",
                                "#1f78b4",
                                "#b2df8a",
                                "#33a02c"))
## figures/four_measures_issp.pdf
## 5x7

## diff-in-means estimates, comparing
## 1992 - 1999 for each variable. 
## We include these estimates in 
## Table 1. 

lm1 <- lm(coefVar_92_99 ~ year, data = issp,
          weights = issp$weight)
lm2 <- lm(sub_gini_92_99 ~ year, data = issp,
          weights = issp$weight)
lm3 <- lm(corporate_ratio ~ year, data = issp,
          weights = issp$weight)
lm4 <- lm(doctor_ratio ~ year, data = issp,
          weights = issp$weight)


stargazer(lm1, lm2, lm3, lm4)


## Figure A.2: 
## In this figure we compare estimates
## for the CEO and doctor pay ratios with
## trends over time in CEO compensation

p1 <- ggplot(data = pay_estimates[pay_estimates$Year >= 1987 &
                                    pay_estimates$Year <= 2019, ],
             mapping = aes(x = Year,
                           y = `Realized.CEO.compensation`)) +
  geom_point() + 
  geom_line() +
  theme_bw() +
  labs(x = NULL,
       color = NULL,
       y = "Average Realized CEO Compensation\nBureau of Labor Statistics")

p2 <- ggplot(overall_mean[overall_mean$type %in% c("Ratio Doctor Pay",
                                             "Ratio CEO Pay") &
                      overall_mean$measure_label == "Three Occupations\n(Exclude Skilled, Sales)", ],
       mapping = aes(x = year,
                     y = mean,
                     ymin = lower,
                     ymax = upper,
                     color = type)) +
  geom_point() +
  geom_errorbar(width = 1) +
  theme_bw() +
  theme(legend.position = "top") +
  scale_color_brewer(type = "qual") +
  labs(x = NULL,
       color = NULL,
       y = "Ratio Perceived CEO/Doctor Pay\nto Unskilled Pay")

gridExtra::grid.arrange(p2,p1)
##figures/ave_occupation_pay_issp_alt.pdf
## 10x7

## Figure A.3
## In this figure we replicate our main
## findings (asymmetric partisan polarization
## in inequality perceptions) using our constructed
## variables from ISSP 

issp$year <- as.factor(as.character(issp$year))

## We restrict the considered variables to just those
## with complete variable observations from 87, 92, 99 
## (i.e. all pay variables making up the variables availalbe
## in all three years) 
 
gini_party <- lm(sub_gini_87_99 ~ factor(year)*party_id + ses + cohort + sex, data = issp)
coef_party <- lm(coefVar_87_99 ~ factor(year)*party_id + ses + cohort + sex, data = issp)
corporate_party <- lm(corporate_ratio_87_99 ~ factor(year)*party_id + ses + cohort + sex, data = issp)
doctor_party <- lm(doctor_ratio_87_99 ~ factor(year)*party_id + ses + cohort + sex, data = issp)

## To display our difference in means test (92-99),
## for each of these variables we restrict the years considered
## to 92-99 and observations to identified Republicans,
## so that the coefficient on year displays
## the difference in means between 92, 99 for Republicans,
## holding other variables
## constant
gini_party2 <- lm(sub_gini_87_99 ~ factor(year)+ ses + cohort + sex, 
                 data = issp[issp$year %in% c(1992, 1999) &
                               issp$party_id == "Republican", ])
coef_party2 <- lm(coefVar_87_99 ~ factor(year)+ ses + cohort + sex,
                 data = issp[issp$year %in% c(1992, 1999) &
                               issp$party_id == "Republican", ])
corporate_party2 <- lm(corporate_ratio_87_99 ~ factor(year) + ses + cohort + sex, 
                      data = issp[issp$year %in% c(1992, 1999) &
                                    issp$party_id == "Republican", ])
doctor_party2 <- lm(doctor_ratio_87_99 ~ factor(year) + ses + cohort + sex,
                   data = issp[issp$year %in% c(1992, 1999) &
                                 issp$party_id == "Republican", ])

## results displayed in Table 2 in SI Section A
stargazer(gini_party2,
         coef_party2,
         corporate_party2,
         doctor_party2)

## Here we run the same results for Democrats 
## don't observe same significant decline: 
lm1 <- lm(sub_gini_87_99 ~ factor(year)+ ses + cohort + sex, 
                  data = issp[issp$year %in% c(1992, 1999) &
                                issp$party_id == "Democrat", ])
lm2 <- lm(coefVar_87_99 ~ factor(year)+ ses + cohort + sex,
                  data = issp[issp$year %in% c(1992, 1999) &
                                issp$party_id == "Democrat", ])
lm3 <- lm(corporate_ratio_87_99 ~ factor(year) + ses + cohort + sex, 
                       data = issp[issp$year %in% c(1992, 1999) &
                                     issp$party_id == "Democrat", ])
lm4 <- lm(doctor_ratio_87_99 ~ factor(year) + ses + cohort + sex,
                    data = issp[issp$year %in% c(1992, 1999) &
                                  issp$party_id == "Democrat", ])
## results referenced in SI Section A, 
## but not displayed
stargazer(lm1, lm2, lm3, lm4)

## Figure A.3: We display estimated values
## for one variable (coefficient of variation)
## by year, partisanship 
## For calculating estimated values 
## we use same strategy outlined in 
## 03_analysis_full_data_ev.R

vcov <- vcov(coef_party)
betas <- coef_party$coefficients


set.seed(08540)
beta.draws <- MASS::mvrnorm(10000, mu = betas, Sigma = vcov)

matrix <- matrix(0, nrow = 9, 
                 ncol = length(beta.draws[1,]))
matrix[,1] <- 1
colnames(matrix) <- names(beta.draws[1,])
rownames(matrix) <- 1:nrow(matrix)

## 
matrix[, "factor(year)1992"] <- c(rep(0,3), rep(1,3), rep(0,3))
matrix[, "factor(year)1999"] <- c(rep(0,6), rep(1,3))
matrix[c(2,5,8), "party_idIndep/Other"] <- 1
matrix[c(3,6,9), "party_idRepublican"] <- 1
matrix[5, "factor(year)1992:party_idIndep/Other"] <- 1
matrix[6, "factor(year)1992:party_idRepublican"] <- 1
matrix[8, "factor(year)1999:party_idIndep/Other"] <- 1
matrix[9, "factor(year)1999:party_idRepublican"] <- 1

sim <- base::apply(beta.draws, 1, function(x){
  vector <- matrix %*% x
  return(vector)
})
sim <- t(sim)
sim <- as.data.frame(sim)

estimates.fct <- function(frame){
  est <- as.data.frame(matrix(NA, ncol = 4, nrow = ncol(frame)))
  colnames(est) <- c("Est", "Lower", "Upper", "Model")
  est$Est <- base::apply(frame, 2, function(x){mean(x)})
  est$Lower <- base::apply(frame, 2, function(x){quantile(x, .025)})
  est$Upper <- base::apply(frame, 2, function(x){quantile(x, .975)})
  est$Model <- colnames(frame)  
  return(est)
}


estimates.issp <- estimates.fct(sim)
estimates.issp$year <- c(rep(1987,3),
                         rep(1992, 3),
                            rep(1999, 3))
estimates.issp$year <- factor(estimates.issp$year)
estimates.issp$party <- rep(c("Democrat", "Independent",
                              "Republican"), 3)


ggplot(estimates.issp,
       aes(x = year,
           y = Est,
           color = party,
           group = party,
           ymin = Lower,
           ymax = Upper)) +
  geom_point(position = position_dodge(.2)) +
  geom_errorbar(width = .1,
                position = position_dodge(.2)) +
  scale_color_manual(values = c("#2c7fb8",
                                "#31a354",
                                "#f03b20")) +
  theme_bw() +
  labs(x = "Year of Survey",
       y = "Respondent Perceived Occupational Pay\nStandard Deviation Divided by Mean",
       color = NULL)
## figures/issp_sd_mean_party.pdf